library(haven)
library(ggplot2)
library(tidyr)
library(dplyr)
library(stringr)

setwd("C:/Users/jonord/Dropbox/Papers/SSP-Instability-Opinion/")
setwd("E:/Dropbox/Papers/SSP-Instability-Opinion/")
setwd("D:/Dropbox/Papers/SSP-Instability-Opinion/")
setwd("C:/Users/jonas/Dropbox/Papers/SSP-Instability-Opinion/")
setwd("~/Dropbox/Papers/SSP-Instability-Opinion/")

growth <- function(x){x/lag(x)-1}

pwt <- read_dta("data/pwt90.dta")
pwt2gwno <- read.csv("data/pwt2gwno.csv", stringsAsFactors = FALSE, strip.white = TRUE)

pwt <- pwt[which(!is.na(pwt$rgdpna)),]

# Each country start at a different year
#View(group_by(df, countrycode) %>% summarise(min(year)))

source("code/brd_old.R")


ucdpyearly <- merge(ucdpyearly, pwt2gwno, by.x="GWNoBattle", by.y="gwno")
df <- merge(pwt, ucdpyearly, by.x=c("country", "year"), by.y=c("country", "Year"), all.x=TRUE)

# Exclude ARE, QAT, KWT, BRN, SAU, BHR because they are extreme outliers, especially before, during and after the oil crisis in the 70s.
oil <- c("ARE", "QAT", "KWT", "BRN", "SAU", "BHR")

df$BdBest <- if_else(is.na(df$BdBest), 0, df$BdBest)

df <- group_by(df, countrycode) %>%
  filter(min(year)<=1970) %>%
  filter(!countrycode %in% oil) %>%
  filter(year >=1970) %>%
  mutate(grwt = growth(rgdpna/pop),
         Bdpop = (BdBest/pop)) %>%
  summarise(sumna = sum(is.na(rgdpna)),
            numyears = n(),
            mean_grwt = mean(grwt, na.rm=T),
            num_under_0 = sum(grwt<0, na.rm=T),
            neggrwt = mean(grwt<0, na.rm=T),
            aBdpop = mean(Bdpop, na.rm=T),
            cyears = mean(Bdpop>50, na.rm=T),
            sd_grwt = sd(grwt, na.rm=T),
            gdpcap1970 = rgdpna[which(year==1970)]/pop[which(year==1970)],
            gdpcap2014 = rgdpna[which(year==2014)]/pop[which(year==2014)])


df$y <- log(df$gdpcap2014/df$gdpcap1970)
df$x <- log(df$gdpcap1970)

# I know b coefficients are negative! Therefore, I transform to abs(coef), and use - instead of + in equation!
lm_eqn <- function(y, x){
  m <- lm(y ~ x)
  eq <- substitute(italic(y) == a - b * italic(x), 
                   list(a = format(coef(m)[1], digits = 2), 
                        b = format(abs(coef(m)[2]), digits = 2), 
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.character(as.expression(eq))
}


df$label <- lm_eqn(df$y, df$x)

df$Bdpop <- df$Bdpop / df$numyears

df$Bdpopcat <- if_else(df$Bdpop== 0, 0L, NA_integer_)
df$Bdpopcat <- if_else(df$Bdpop > 0 & df$Bdpop <= 10, 1L, df$Bdpopcat)
df$Bdpopcat <- if_else(df$Bdpop > 10 & df$Bdpop <= 100, 2L, df$Bdpopcat)
df$Bdpopcat <- if_else(df$Bdpop > 100, 3L, df$Bdpopcat)
df$Bdpopcat <- factor(df$Bdpopcat, labels=c("0", "1-10", "11-100", "101-1188"))


df$peace <- factor(if_else(df$Bdpop == 0, 1L, 0L))


p1 <- ggplot(df, aes(x=x, y=y, label=label, color=Bdpopcat, shape=peace)) + geom_point(size=3) +
  geom_smooth(data=df, aes(x=x, y=y), inherit.aes=FALSE, method = "lm", se=FALSE, color="red", formula = y ~ x) +
  geom_text(data=df, aes(x=x, y=y, label=label), inherit.aes=FALSE, x=8, y=3, size=6, parse=TRUE) +
  ylab("log(GDP/capita 2014 / GDP/capita 1970)") + xlab("log(GDP/capita in 1970)") +
  coord_equal() +
  scale_color_manual("Avg. battle deaths per million pop. per year", 
                     values=c("#000000", "#fdcc8a", "#fc8d59", "#d7301f")) +
  scale_shape_manual(values=c(19, 21), guide=FALSE) +
  theme_bw() +
  theme(strip.text.x = element_text(size=12),
        axis.title = element_text(size=12),
        axis.text = element_text(size=12),
        legend.position = "bottom",
        legend.text = element_text(size=12),
        legend.title = element_text(size=12)) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))

ggsave("results/avg_growth_historical2_BattleDeathsFixed.tiff", plot=p1, width=6, height=6, dpi=300, compression="zip")